import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import plotly.express as px
import scipy.stats as stats
def read_data(addr_string,index):
return pd.read_excel(addr_string, index)
path = r'C:\Users\rishi\Downloads\Projects\Bank Loans\Bank_Personal_Loan_Modelling.xlsx'
df = read_data(path,1)
df.shape
df.isnull().sum()
df.drop(['ID', 'ZIP Code'] , axis = 1, inplace = True )
df.head()
df.columns
px.box(df , y = ['Age', 'Experience', 'Income', 'Family', 'Education'] , title = '5 point summary of the basic data')
df.skew()
df.hist(figsize=(20,20) )
sns.distplot(df['Experience'])
df['Experience'].mean()
Negative_exp = df[df['Experience']<0]
sns.distplot(Negative_exp['Age'])
Negative_exp['Experience'].mean()
Negative_exp.size
data = df.copy()
data['Experience'] = np.where(data['Experience'] < 0, data['Experience'].mean(), data['Experience'])
data[data['Experience'] < 0]
plt.figure(figsize=(10,6))
sns.heatmap(df.corr(), annot = True)
plt.title ('Relationship of Variables')
data = data.drop(['Experience'] , axis = 1)
data['Education'].unique()
def mark(x):
if x == 1:
return 'Undergrad'
elif x == 2:
return 'Graduate'
else:
return 'Professional'
data['Edu_Mark'] = data['Education'].apply(mark)
edu_dis = data.groupby('Edu_Mark')['Age'].count()
px.pie(data , values = edu_dis , names = edu_dis.index , title = 'Customer Educational Experience')
def Security_CD(row):
if (row['Securities Account'] == 1) & (row ['CD Account'] == 1):
return 'Holds Securities and Deposits'
elif (row['Securities Account'] == 0) & (row['CD Account'] == 0):
return "Doesn't hold Securities and Deposits"
elif (row['Securities Account'] == 1) & (row ['CD Account'] == 0):
return 'Holds Securities'
elif (row['Securities Account'] == 0) & (row ['CD Account'] == 1):
return 'Holds Deposits'
data['Account_holder_category'] = data.apply(Security_CD , axis = 1)
data.head()
values = data['Account_holder_category'].value_counts()
fig = px.pie(data , values = values , names = values.index , title = 'Customers with securities and deposit' , hole = 0.5)
fig.show()
px.box( data , x = 'Education' , y = 'Income' , facet_col= 'Personal Loan',
title = 'Customer on the basis of Education status and Income for Personal Loan')
def plot(col1,col2 , label1, label2 , title):
plt.figure(figsize=(12,8))
sns.distplot(data[data[col2] == 0][col1] , hist = False , label = label1)
sns.distplot(data[data[col2] == 1][col1] , hist = False , label = label2)
plt.legend()
plt.title (title)
plot ('Income' , 'Personal Loan' , 'Customers Income not availing Personal Loan','Customers Income availing Personal Loan' ,'Income Distribution')
plot ('CCAvg' , 'Personal Loan' , 'Credit Card Avg with availing no Personal Loan',
'Credit Card Avg with availing Personal Loan' ,'Credit Card Avg Distribution')
plot ('Mortgage' , 'Personal Loan' , 'Mortgage with no Personal Loan',
'Mortgage with Personal Loan' ,'Mortgage Distribution')
col_names = ['Securities Account' , 'Account_holder_category' , 'CreditCard' , 'Online']
for i in col_names:
plt.figure(figsize=(10,5))
sns.countplot( x = i , hue = 'Personal Loan' , data = data)
plt.title ('categories of customer on the basis of {}'. format(i))
sns.scatterplot(data['Age'],data['Personal Loan'],hue = data['Family'])
plt.title ("Age and Personal Loan Relationship")
Ho = "Age Doesn't have impact on personal loan"
Ha = "Age Does have impact on personal loan"
def hypo(col1,col2,H0,HA):
arr1 = np.array(data[data[col1] == 0][col2])
arr2 = np.array(data[data[col1] == 1][col2])
t,p_value = stats.ttest_ind(arr1 , arr2 , axis = 0)
if p_value < 0.05:
print (HA , ' as the p value is less then 0.05 with a value of {}' .format(p_value))
else:
print (H0 , ' as the p value is more then 0.05 with a value of {}' .format(p_value))
sns.scatterplot(data[col1],data[col2])
plt.title (col1 + " and " +col2+ " Relation" )
hypo('Personal Loan','Age', Ho, Ha )
Ho = "Income Doesn't have impact on personal loan"
Ha = "Income Does have impact on personal loan"
hypo('Personal Loan','Income', Ho, Ha)
Ho = "Family Doesn't have impact on personal loan"
Ha = "Family Does have impact on personal loan"
hypo('Personal Loan','Family', Ho, Ha)